UMAPs
#Datasets
crotches38i <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/crotches38i.csv")
quavers38ii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/quavers38ii.csv")
crotches38iii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/crotches38iii.csv")
crotches99i <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/crotches99i.csv")
semiquavers99ii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/curatedData/semiquavers99ii.csv")
sinbar <- crotches38i
sinbar$bar <- NULL
# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(crotches38i$bar)
a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()

#a$bar <- as.factor(quavers38ii$bar)
#a$bar <- as.factor(crotches38iii$bar)
#a$bar <- as.factor(crotches99i$bar)
#a$bar <- as.factor(semiquavers99ii$bar)
a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "left") +
coord_equal()

sinbar <- quavers38ii
sinbar$bar <- NULL
# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(quavers38ii$bar)
a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "left") +
coord_equal()

sinbar <- crotches38iii
sinbar$bar <- NULL
# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(crotches38iii$bar)
a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "left") +
coord_equal()

sinbar <- crotches99i
sinbar$bar <- NULL
# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(crotches99i$bar)
a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "left") +
coord_equal()

sinbar <- semiquavers99ii
sinbar$bar <- NULL
# Colors by performers
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
a<-bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))
a$bar <- as.factor(semiquavers99ii$bar)
a |> ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()

a|> ggplot(aes(x = UMAP1, y = UMAP2, col = bar)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "left") +
coord_equal()

library(NbClust)
multi <- crotches99i[,2:7]
nb99i<-NbClust(data = multi, diss = NULL, distance = "euclidean", min.nc = 2, max.nc = 10,
method = "kmeans", index = "all", alphaBeale = 0.1)
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##

## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## *******************************************************************
## * Among all indices:
## * 9 proposed 2 as the best number of clusters
## * 6 proposed 3 as the best number of clusters
## * 1 proposed 5 as the best number of clusters
## * 2 proposed 6 as the best number of clusters
## * 3 proposed 7 as the best number of clusters
## * 1 proposed 9 as the best number of clusters
## * 1 proposed 10 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
crotches99i$nbclust <- nb99i$Best.partition
cluster1 <- crotches99i[crotches99i$nbclust == 1,]
cluster2 <- crotches99i[crotches99i$nbclust == 2,]
table(cluster1$performer)
##
## CH DPB FB GG GGu IH MA MG ML PN PR66 PV RS SB SS
## 178 161 132 175 166 157 152 157 156 175 171 164 156 178 163
table(cluster2$performer)
##
## CH DPB FB GG GGu IH MA MG ML PN PR66 PV RS SB SS
## 97 114 78 100 109 118 123 118 119 100 104 111 119 97 112
prop <- numeric(15)
for (i in 1:15){
a <- table(cluster2$performer)[i]
b <- table(cluster1$performer)[i]
prop[i] <- as.numeric(a)/(as.numeric(b)+as.numeric(a))
}
hist(cluster1$bar, breaks = 50)
hist(cluster2$bar, breaks = 50)

prop
## [1] 0.3527273 0.4145455 0.3714286 0.3636364 0.3963636 0.4290909 0.4472727
## [8] 0.4290909 0.4327273 0.3636364 0.3781818 0.4036364 0.4327273 0.3527273
## [15] 0.4072727
multi <- crotches38i[,2:9]
nb38i<-NbClust(data = multi, diss = NULL, distance = "euclidean", min.nc = 2, max.nc = 10,
method = "kmeans", index = "all", alphaBeale = 0.1)
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in log(det(P)/det(W)): NaNs produced
## Warning in max(DiffLev[, 3], na.rm = TRUE): no non-missing arguments to max;
## returning -Inf

## *** : The Hubert index is a graphical method of determining the number of clusters.
## In the plot of Hubert index, we seek a significant knee that corresponds to a
## significant increase of the value of the measure i.e the significant peak in Hubert
## index second differences plot.
##
## *** : The D index is a graphical method of determining the number of clusters.
## In the plot of D index, we seek a significant knee (the significant peak in Dindex
## second differences plot) that corresponds to a significant increase of the value of
## the measure.
##
## Warning in matrix(c(results), nrow = 2, ncol = 26): data length [51] is not a
## sub-multiple or multiple of the number of rows [2]
## Warning in matrix(c(results), nrow = 2, ncol = 26, dimnames =
## list(c("Number_clusters", : data length [51] is not a sub-multiple or multiple
## of the number of rows [2]

## *******************************************************************
## * Among all indices:
## * 2 proposed 2 as the best number of clusters
## * 1 proposed 3 as the best number of clusters
## * 1 proposed 8 as the best number of clusters
##
## ***** Conclusion *****
##
## * According to the majority rule, the best number of clusters is 2
##
##
## *******************************************************************
crotches38i$nbclust <- nb38i$Best.partition
cluster1 <- crotches38i[crotches38i$nbclust == 1,]
cluster2 <- crotches38i[crotches38i$nbclust == 2,]
table(cluster1$performer)
##
## CH DPB FB FvdP GG GGr GGu IH MA MG ML MP PN PR36 PR66 PS
## 219 136 156 150 208 182 174 190 130 182 183 192 198 145 171 164
## PV RS SB SS
## 215 194 162 214
table(cluster2$performer)
##
## CH DPB FB FvdP GG GGr GGu IH MA MG ML MP PN PR36 PR66 PS
## 151 144 124 130 162 188 196 180 150 188 187 178 172 135 109 116
## PV RS SB SS
## 155 176 118 156
prop <- numeric(15)
for (i in 1:15){
a <- table(cluster2$performer)[i]
b <- table(cluster1$performer)[i]
prop[i] <- as.numeric(a)/(as.numeric(b)+as.numeric(a))
}
hist(cluster1$bar, breaks = 50)
hist(cluster2$bar, breaks = 50)

prop
## [1] 0.4081081 0.5142857 0.4428571 0.4642857 0.4378378 0.5081081 0.5297297
## [8] 0.4864865 0.5357143 0.5081081 0.5054054 0.4810811 0.4648649 0.4821429
## [15] 0.3892857
library(kohonen)
som_grid <- somgrid(xdim = 20, ydim = 20, topo = "hexagonal")
# Example: train the SOM for 100 iterations
som_model <- som(X = as.matrix(crotches99i[,2:7]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(semiquavers99ii[,2:17]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(crotches38i[,2:9]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(quavers38ii[,2:13]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

som_model <- som(X = as.matrix(crotches38iii[,2:9]), grid = som_grid, rlen = 100)
plot(som_model)

plot(som_model, type = "dist.neighbours", main = "U-Matrix for unsupervised SOM", keepMargins = TRUE, shape = "straight")

unor38i <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/unor38i.csv")
unor38iii <- read.csv("C:/Users/samte/OneDrive/Desktop/TFM Clustering of musical patterns/unor38iii.csv")
united <- rbind(unor38i,unor38iii)
sinbar <- united
sinbar$bars <- NULL
uns_rec_prep <- recipe(performer ~ ., data = sinbar) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
bake(uns_rec_prep, new_data = sinbar, performer, starts_with("umap"))|>
ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()

uns_rec_prep <- recipe(performer ~ ., data = unor38i[,1:17]) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
bake(uns_rec_prep, new_data = unor38i[,1:17], performer, starts_with("umap"))|>
ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()

uns_rec_prep <- recipe(performer ~ ., data = unor38iii[,1:17]) |>
step_umap(all_predictors(), num_comp = 2) |>
prep()
bake(uns_rec_prep, new_data = unor38iii[,1:17], performer, starts_with("umap"))|>
ggplot(aes(x = UMAP1, y = UMAP2, col = performer)) +
geom_point(alpha = .5) +
theme_bw() +
theme(legend.position = "top") +
coord_equal()
